HMM and masting

1 Load data

raw_data <- read.csv(file.path(wd, 'data',  'ebms', 'Beech_tree-ring_masting_data.csv'))
clim_data <- readRDS(file.path(wd, 'data',  'ebms', 'era5land_sitesextract.rds'))

raw_data$uniqueID <- paste0(raw_data$site.ID, "_", raw_data$tree.ID)
raw_data <- na.omit(raw_data[,c('site.ID', 'uniqueID', 'year', 'seeds')])

uniq_tree_ids <- unique(raw_data$uniqueID)
N_trees <- length(uniq_tree_ids)
N_sites <- length(unique(raw_data$site.ID))
first_year <- min(raw_data$year)
last_year <- max(raw_data$year)
max_years <- first_year:max(raw_data$year)
N_max_years <- length(max_years)

We have 57 trees on 7 sites. Observations were collected from 1980 to 2022.

2 Format data

seed_counts <- c()
years <- c()
prevsummer_temps <-c()
spring_temps <-c()
gdd_lastfrost <-c()
N_years <- c()
idx <- 1
tree_start_idxs <- c()
tree_end_idxs <- c()
for (tid in uniq_tree_ids) {
  
  raw_data_tree <- raw_data[raw_data$uniqueID == tid,]
  
  years_tree <- raw_data_tree$year-first_year+1
  years <- c(years, years_tree)
  
  N_years_tree <- length(years_tree)
  N_years <- c(N_years, N_years_tree)
  
  seed_count_tree <- raw_data_tree$seeds
  seed_counts <- c(seed_counts, seed_count_tree)
  
  prevsummer_temps_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'meantmax_ja'] # all years, even those unobserved
  prevsummer_temps <- c(prevsummer_temps, prevsummer_temps_tree)
  
  spring_temps_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'meantmean_am'] # all years, even those unobserved
  spring_temps <- c(spring_temps, spring_temps_tree)
  
  gdd_lastfrost_tree <- clim_data[clim_data$site.ID == raw_data_tree$site.ID[1] & clim_data$year %in% (max_years-1), 'gdd_b5_tolastfrost']/10 # all years, even those unobserved, in *10degC
  gdd_lastfrost <- c(gdd_lastfrost, gdd_lastfrost_tree)
  
  tree_start_idxs <- c(tree_start_idxs, idx)
  idx <- idx + N_years_tree
  tree_end_idxs <- c(tree_end_idxs, idx - 1)
  
}

3 Create some future climates

We can create some false future summer conditions, based on a pessimistic increase of 5°C in one century.

For spring frost risk and spring temperature, we may simply apply the observed trends.

4 Create new trees to predict

trees_per_stand <- 100
unique_stands <- "Benwell"
newtree_stand_idxs <- rep(which(unique_stands==unique_stands), each = trees_per_stand)
N_newtrees <- length(newtree_stand_idxs)
N_max_newyears <- length(years_to_predict)
first_newyear <- min(years_to_predict)
newyears <- c()
newprevsummer_temps <-c()
newspring_temps <-c()
newgdd_lastfrost <-c()
N_newyears <- c()
idx <- 1
newtree_start_idxs <- c()
newtree_end_idxs <- c()
for (i in 1:N_newtrees) {
  
  newyears_tree <- years_to_predict-first_newyear+1
  newyears <- c(newyears, newyears_tree)
  
  N_newyears_tree <- length(newyears_tree)
  N_newyears <- c(N_newyears, N_newyears_tree)
  
  newprevsummer_temps_tree <- summertemp
  newprevsummer_temps <- c(newprevsummer_temps, newprevsummer_temps_tree)
  
  newspring_temps_tree <- springtemp
  newspring_temps <- c(newspring_temps, newspring_temps_tree)

  newgdd_lastfrost_tree <- frostgdd/10
  newgdd_lastfrost <- c(newgdd_lastfrost, newgdd_lastfrost_tree)
  
  newtree_start_idxs <- c(newtree_start_idxs, idx)
  idx <- idx + N_newyears_tree
  newtree_end_idxs <- c(newtree_end_idxs, idx - 1)
}

5 Posterior quantification

N <- length(years)
Nnew <- length(newyears)
data <- mget(c('N', 'N_trees', 'N_max_years',
               'N_years', 'tree_start_idxs', 'tree_end_idxs',
               'seed_counts', 'years', 
               'prevsummer_temps', 'spring_temps', 'gdd_lastfrost',
               # for predictions
               'Nnew', 'N_newtrees', 'N_max_newyears', 'newtree_stand_idxs',
               'N_newyears', 'newtree_start_idxs', 'newtree_end_idxs','newyears', 
               'newprevsummer_temps', 'newgdd_lastfrost', 'newspring_temps'
))

# Posterior quantification
if(FALSE){
  fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertempfull_springfrost_springtemp_constrainedslopes_wpred_breakdownpred_standlevel.stan"),
              data=data, seed=5838299, chain = 4, cores = 4,
              warmup=1000, iter=2024, refresh=100)
  saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_fullmodel_constrainedslopes.rds'))
  diagnostics <- util$extract_hmc_diagnostics(fit)
  util$check_all_hmc_diagnostics(diagnostics)
  samples <- util$extract_expectand_vals(fit)
  base_samples <- util$filter_expectands(samples,
                                         c('lambda1', 'theta1', 'psi1',
                                           'lambda20', 'psi2', 
                                           'beta_lambda2_frost', 'beta_lambda2_spring',  
                                           'rho0', 'beta_nm_m', 'beta_m_m',
                                           'tau_nm_m0', 'tau_m_m0'))
  util$check_all_expectand_diagnostics(base_samples)
  
  fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertemp_springfrost_springtemp_constrainedslopes_wpred.stan"),
              data=data, seed=5838299, chain = 4, cores = 4,
              warmup=1000, iter=2024, refresh=100)
  saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_biologymodel_constrainedslopes.rds'))
  diagnostics <- util$extract_hmc_diagnostics(fit)
  util$check_all_hmc_diagnostics(diagnostics)
  samples <- util$extract_expectand_vals(fit)
  base_samples <- util$filter_expectands(samples,
                                         c('lambda1', 'theta1', 'psi1',
                                           'lambda20', 'psi2', 
                                           'beta_lambda2_frost', 'beta_lambda2_spring', 
                                           'rho0', 'beta_nm_m', 
                                           'tau_nm_m0', 'tau_m_m0'))
  util$check_all_expectand_diagnostics(base_samples)
  
  fit <- stan(file= file.path(wd, "stan", "paper_models", "model2_treelevel_zinb_missing_rescaled_summertemp_springfrost_springtemp_wpred_standlevel.stan"),
              data=data, seed=5838299, chain = 4, cores = 4,
              warmup=1000, iter=2024, refresh=100)
  saveRDS(fit, file = file.path(wd, 'output', 'fit_25sept2025_biologymodel.rds'))
  diagnostics <- util$extract_hmc_diagnostics(fit)
  util$check_all_hmc_diagnostics(diagnostics)
  samples <- util$extract_expectand_vals(fit)
  base_samples <- util$filter_expectands(samples,
                                         c('lambda1', 'theta1', 'psi1',
                                           'lambda20', 'psi2', 
                                           'beta_lambda2_frost', 'beta_lambda2_spring', 
                                           'rho0', 'beta_nm_m', 
                                           'tau_nm_m0', 'tau_m_m0'))
  util$check_all_expectand_diagnostics(base_samples)
}else{
  fit_full_const <- readRDS(file.path(wd, 'output', 'fit_25sept2025_fullmodel_constrainedslopes.rds'))
  fit_biol_const <- readRDS(file = file.path(wd, 'output', 'fit_25sept2025_biologymodel_constrainedslopes.rds'))
  fit_biol <- readRDS(file = file.path(wd, 'output', 'fit_25sept2025_biologymodel.rds'))
}

6 Retrodictive check

6.1 Across all trees

samples <- util$extract_expectand_vals(fit_biol)


# Retrodictive check
observed_idxs <- c()
for (t in 1:data$N_trees){
  idxs <- tree_start_idxs[t]:tree_end_idxs[t]
  observed_idxs_tree <- N_max_years*(t-1)+years[idxs]
  observed_idxs <- c(observed_idxs, observed_idxs_tree)
}
par(mfrow=c(1, 1), mar = c(4,4,2,2))
names <- sapply(observed_idxs, function(n) paste0('seed_counts_pred[',n,']'))
util$plot_hist_quantiles(samples[names], 'seed_counts_pred', 0, 340, 20,
                         baseline_values=data$seed_counts, 
                         xlab="Seed counts")
Warning in check_bin_containment(bin_min, bin_max, collapsed_values,
"predictive value"): 95576 predictive values (1.6%) fell above the binning.
Warning in check_bin_containment(bin_min, bin_max, baseline_values, "observed
value"): 16 observed values (1.1%) fell above the binning.

6.2 For some individual trees

set.seed(123456)
par(mfrow=c(3, 3), mar = c(4,4,2,2))
for (t in sample(1:data$N_trees,9)) {
  
  idxs_tree <-(1+N_max_years*(t-1)):(N_max_years*t)
  observed_idxs_tree <- tree_start_idxs[t]:tree_end_idxs[t]
  observed_flags <- years[tree_start_idxs[t]:tree_end_idxs[t]]
  
  names <- sapply(idxs_tree,
                  function(n) paste0('seed_counts_pred[', n, ']'))
  xlab="Year"
  xticklabs=first_year:last_year
  ylab="Seed Counts"
  display_ylim=c(0, 400)
  main=paste("Tree", uniq_tree_ids[t])
  
  # Construct bins
  N <- length(names)
  bin_min <- 0.5
  bin_max <- N + 0.5
  bin_delta <- 1
  breaks <- seq(bin_min, bin_max, bin_delta)
  
  plot_config <- util$configure_bin_plotting(breaks)
  plot_idxs <- plot_config[[1]]
  plot_xs <- plot_config[[2]]
  
  # Construct marginal quantiles
  probs <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
  calc <- function(n) {
    util$ensemble_mcmc_quantile_est(samples[[names[n]]], probs)
  }
  quantiles <- sapply(1:N, calc)
  plot_quantiles <- do.call(cbind, lapply(plot_idxs,
                                          function(n) quantiles[1:9, n]))
  
  delta <- 0.05 * (display_ylim[2] - display_ylim[1])
  display_ylim[1] <- display_ylim[1] - delta
  display_ylim[2] <- display_ylim[2] + delta      
  
  plot(1, type="n", main=main,
       xlim=c(bin_min, bin_max), xlab=xlab, xaxt="n",
       ylim=display_ylim, ylab=ylab)
  axis(1, at=1:N, labels=xticklabs)    
  
  for(n in 1:N){
    
    idplot <- c(n*2-1, n*2)
    polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
            c(plot_quantiles[1,idplot], rev(plot_quantiles[9,idplot])),
            col = ifelse(n%in%observed_flags, util$c_light, 'grey90'), border = NA)
    polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
            c(plot_quantiles[2,idplot], rev(plot_quantiles[8,idplot])),
            col = ifelse(n%in%observed_flags, util$c_light_highlight, 'grey80'), border = NA)
    polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
            c(plot_quantiles[3,idplot], rev(plot_quantiles[7,idplot])),
            col = ifelse(n%in%observed_flags, util$c_mid, 'grey70'), border = NA)
    polygon(c(plot_xs[idplot], rev(plot_xs[idplot])),
            c(plot_quantiles[4,idplot], rev(plot_quantiles[6,idplot])),
            col = ifelse(n%in%observed_flags, util$c_mid_highlight, 'grey60'), border = NA)
    
    lines(plot_xs[idplot], plot_quantiles[5, idplot],
          col= ifelse(n%in%observed_flags,util$c_dark, 'grey50'), lwd=2)
  }
  
  for(i in observed_idxs_tree) {
    lines(c(years[i] - 0.5, years[i] + 0.5),
          rep(seed_counts[i], 2),
          col="white", lwd=4)
    lines(c(years[i] - 0.5, years[i] + 0.5),
          rep(data$seed_counts[i], 2),
          col="black", lwd=2)
  }
  
}

7 Posterior inferences

First let’s look at the transition matrix parameters!
\rho_0 is the initial masting probability, \tau_0^{\text{M} \rightarrow \text{M}} is the probability of staying in a masting state, \tau_0^{\text{NM} \rightarrow \text{M}} is the probability of transition from non-masting to masting (at 15°C, a cold summer), modified by summer temperature with the slope \beta_\text{summer}^{\text{NM} \rightarrow \text{M}}.

Then look at the seed production parameters!
\lambda_\text{NM} is the mean production of seeds in a non-masting state, with an overdispersion (inverse of precision) \psi_\text{NM}. \theta_\text{NM} is the probability to observe a zero in a non-masting state (zero-inflation). \lambda_\text{NM} is the mean production of seeds in a masting state (at some baseline conditions…), increase or decrease by spring frost risk with \beta_\text{frost}^\text{M} and spring temperature with \beta_\text{spring}^\text{M}. \psi_\text{M} is the overdispersion parameter for masting.

8 Synchrony

8.1 Across all trees

8.2 Tree-level state, by year